include constants.doh

// See the README for instructions on how to access this data
import delimited "input/pipeline/hs_rd_output.csv", clear

gen syrian_friends = syrian_friends_school + syrian_friends_not_school
// We use this term to cluster our standard errors.
fegen treatment_cluster = group(school_id cohort_year)
xtile high_bias_temp = nuts3_relative_friending_ptile, n(2)
gen high_bias = 1 if high_bias_temp == 1
replace high_bias = 0 if missing(high_bias)
drop high_bias_temp
keep if in_perfect_right_way == 1
fegen nuts3_factor = group(nuts3)

// This is necessary to avoid going over the character limit for Stata variable names
rename syrian_friends* sf*
rename friends_* f_*

local age_to_sum_to = 20

//
// We replace the overall friends added with ones that only include the friends
// added p to age 20 (since this is the last year for which all of our sample is
// of age)
//
foreach outcome in "" "_school" "_not_school" "_nc_early" "_dnc_early" {
	rename sf`outcome' sf`outcome'_old
	gen sf`outcome' =  0
	foreach y of numlist 13/`age_to_sum_to' {
		replace sf`outcome' = sf`outcome' + f_`y'`outcome'
	}
}

// This code produces the time-series graph by doing a separate regression for each point, featuring
// a different subset of eligible friendships.
foreach aggregation in "_cumulative" {
	cap frame drop ests`aggregation'
	frame create ests`aggregation' double(year point_estimate standard_error) str20(est_type) str10(subgroup)
	foreach t in "" "_not_school" "_nc_early" {
		if "`aggregation'" == "_cumulative" {
			gen f_`t' = 0
		}
		local coefplot_options`t' = ""
		foreach i of numlist 13/22 {
			if "`aggregation'" == "_cumulative" {
				replace f_`t' = f_`t' + f_`i'`t'
			}
			foreach condition in "right" {
				if ("`condition'" == "right") {
					local restriction = "if in_right_way == 1"
					local mc = "blue"
				}
				else if ("`condition'" == "rightl") {
					local restriction = "if in_right_way == 1 & high_bias == 0"
					local mc = "teal"
				}
				else if ("`condition'" == "righth") {
					local restriction = "if in_right_way == 1 & high_bias == 1"
					local mc = "orange"
				}
				di "`i'"
				if "`aggregation'" == "" {
					quietly reghdfe f_`i'`t' treated `restriction', absorb(i.birth_year#i.nuts3_factor school_id) vce(cluster treatment_cluster)
					estimates store f_`i'`t'_`condition'
				}
				else if "`aggregation'" == "_cumulative" {
					quietly reghdfe f_`t' treated `restriction', absorb(i.birth_year#i.nuts3_factor school_id) vce(cluster treatment_cluster)
				}
				if(`i'==13){
					local lab = "label(`condition')"
				}
				else {
					local lab = "nokey"
				}
				frame post ests`aggregation' (`i') (_b[treated]) (_se[treated]) ("`t'") ("`condition'")
				local coefplot_options`t' = "`coefplot_options`t'' (f_`i'`t'_`condition', mcolor(`mc') `lab' ciopts(lcolor(`mc')))"
			}
		}
	}
	frame change ests`aggregation'
	gen low_est = point_estimate - 1.96 * standard_error
	gen hi_est = point_estimate + 1.96 * standard_error
	replace year = year -.3 if est_type == ""
	replace year = year -.1 if est_type == "_not_school"
	replace year = year +.1 if est_type == "_nc_early"
	replace year = year +.3 if est_type == "_dnc_early"
	foreach condition in "right"  {
		twoway ///
			(scatter point_estimate year if est_type == "", mcolor(blue)) ///
			(rcap low_est hi_est year if est_type == "", lcolor(blue)) ///
			(scatter point_estimate year if est_type == "_not_school", mcolor(red)) ///
			(rcap low_est hi_est year if est_type == "_not_school", lcolor(red)) ///
			(scatter point_estimate year if est_type == "_nc_early", mcolor(green)) ///
			(rcap low_est hi_est year if est_type == "_nc_early", lcolor(green)) ///
			(scatter point_estimate year if est_type == "_dnc_early", mcolor(purple)) ///
			(rcap low_est hi_est year if est_type == "_dnc_early", lcolor(purple)) ///
			if subgroup == "`condition'", ///
			yline(0, lcolor(gray) lpattern(dash)) ///
			legend( ///
				order(1 3 5 7) ///
				label(1 "All") ///
				label(3 "Excluding Classmates") ///
				label(5 "Excluding Friends of Syrian Classmates") ///
				label(7 "Excluding Friends of All Classmates") ///
				size(small) ///
			) ///
			xscale(range(12 23)) ///
			xlabel(13(2)22) ///
			yscale(range(-0.01 0.04)) ///
			ylabel(0(0.01)0.04, angle(horizontal)) ///
			xtitle(Age) ///
			ytitle("Additional Syrian Friends (Cumulative)") ///
			scheme(s1color)
		graph export "output/side_by_side_min_timeline`aggregation'_`condition'.pdf"
	}
	frame change default
}

// The following code block runs through all of the regressions included in the high school table.
// it saves the coefficients, SEs, and Ns to matrices that can be accessed in Mata when the table
// gets put together.

sum cohort_size
local size_mean: di `r(mean)'
local size_sd: di `r(sd)'
gen cohort_size_stand = (cohort_size - `size_mean') / `size_sd'

gen cohort_size_x_treated = cohort_size_stand * treated
foreach y in "sf"  "sf_not_school" "sf_nc_early" {
	reghdfe `y' treated, absorb(i.birth_year#i.nuts3_factor school_id) vce(cluster treatment_cluster)
	matrix reg_`y' = r(table)
	matrix n_`y' = e(N)

	reghdfe `y' treated cohort_size_x_treated, absorb(i.birth_year#i.nuts3_factor school_id) vce(cluster treatment_cluster)
	matrix reg_`y'_heterogeneity = r(table)
	matrix n_`y'_heterogeneity = e(N)

	sum `y' if treated == 0
	local mean_control_`y': di `r(mean)'
}



local excel_file = "hs_rd_table.xlsx"
! rm /tmp/`excel_file'

// This bit of code constructs the table entirely from scratch and converts it to a pdf using openoffice.
// I have already installed Openoffice on devbig122 (which will probably be the only devserver this is run
// on), but if for some reason it isn't working, run the install_openoffice.sh script included in the main
// folder.
// You may need to update the version numbers for the LibreOffice version, since LibreOffice sometimes
// removes the old versions from the website.
mata
// It's helpful to define all these in advance since we'll probably rearrange it at some point.

start_sf_col = 2
sf_col = 2
sf_heterogeneity_col = 3
end_sf_col = 3

gap1_col = 4

start_sf_not_school_col = 5
sf_not_school_col = 5
sf_not_school_heterogeneity_col = 6
end_sf_not_school_col = 6

gap2_col = 7

start_sf_nc_early_col = 8
sf_nc_early_col = 8
sf_nc_early_heterogeneity_col = 9
end_sf_nc_early_col = 9

min_col = start_sf_col
max_col = end_sf_nc_early_col

reg_col_width = 10
left_label_width = 20
top_row_height = 36
treated_heterogeneity_row_height = 24
treated_row = 2
treated_se_row = 3
treated_heterogeneity_row = 4
treated_heterogeneity_se_row = 5
school_fe_row = 6
birth_county_fe_row = 7
n_row = 8
mean_control_row = 9
max_row = 9

// Basic setup for the Excel sheet
b = xl()
excel_file = st_local("excel_file")
excel_file = "/tmp/" + excel_file
b.create_book(excel_file, "Sheet1")
b.set_font((1,max_row), (1,max_col), "Calibri", 10)
b.set_sheet("Sheet1")

// Overall Header Setup
b.set_sheet_merge("Sheet1", (1,1), (start_sf_col, end_sf_col))
b.set_bottom_border(1,(start_sf_col, sf_col), "thin")
b.put_string(1,start_sf_col,"Syrian Friends")
b.set_font_bold(1,start_sf_col,"on")
b.set_column_width(start_sf_col, end_sf_col, reg_col_width)

// Out of school Header Setup
b.set_sheet_merge("Sheet1", (1,1), (start_sf_not_school_col, end_sf_not_school_col))
b.set_bottom_border(1,(start_sf_not_school_col, end_sf_not_school_col), "thin")
b.put_string(1,start_sf_not_school_col,"Syrian Friends`=char(10)'(Excluding Classmates)")
b.set_font_bold(1,start_sf_not_school_col,"on")
b.set_column_width(start_sf_not_school_col, end_sf_not_school_col, reg_col_width)

// No classmates, no friends of Syrian classmates Header Setup
b.set_sheet_merge("Sheet1", (1,1), (start_sf_nc_early_col,max_col))
b.set_bottom_border(1,(start_sf_nc_early_col, end_sf_nc_early_col), "thin")
b.put_string(1,start_sf_nc_early_col, "Syrian Friends`=char(10)'(Excluding Syrian Classmates`=char(10)'and their Friends)")
b.set_font_bold(1,start_sf_nc_early_col,"on")
b.set_column_width(start_sf_nc_early_col, end_sf_nc_early_col, reg_col_width)

b.set_vertical_align((1, max_row), (min_col, max_col), "center")
b.set_horizontal_align((1,max_row), (min_col, max_col), "center")

b.set_column_width(gap1_col, gap1_col, 1)
b.set_column_width(gap2_col, gap2_col, 1)
b.set_column_width(gap3_col, gap3_col, 1)

b.set_row_height(1,1,top_row_height)
b.set_top_border(1,(1,max_col),"thick")
b.set_column_width(1,1,left_label_width)
b.put_string(treated_row,1,"Syrian in Cohort")
b.put_string(treated_heterogeneity_row,1,"Syrian in Cohort x")
b.put_string(treated_heterogeneity_se_row,1,"Standardized Cohort Size")
b.put_string(school_fe_row,1,"School FE")
b.put_string(birth_county_fe_row,1,"Birth Year x County FE")
b.set_bottom_border(birth_county_fe_row,(1,max_col), "thin")
b.put_string(n_row,1,"N")
b.put_string(mean_control_row,1,"Mean in Control Cohort")

// Fill in the Xs for the FEs included in each column
for (i=2; i<=max_col; i++) {
	if (i==gap1_col | i==gap2_col ) {
		continue
	}
	b.put_string(school_fe_row,i, "X")
	b.put_string(birth_county_fe_row,i, "X")
}


// Fill in all the numbers from the regressions


reg_sf = st_matrix("reg_sf")
n_sf = st_matrix("n_sf")
b.put_number(treated_row, sf_col, reg_sf[1,1])
b.put_number(treated_se_row, sf_col, reg_sf[2,1])
b.put_number(n_row, sf_col, n_sf[1,1])
b.put_number(mean_control_row, sf_col, strtoreal(st_local("mean_control_sf")))

reg_sf_heterogeneity = st_matrix("reg_sf_heterogeneity")
n_sf_heterogeneity = st_matrix("n_sf_heterogeneity")
b.put_number(treated_row, sf_heterogeneity_col, reg_sf_heterogeneity[1,1])
b.put_number(treated_se_row, sf_heterogeneity_col, reg_sf_heterogeneity[2,1])
b.put_number(treated_heterogeneity_row, sf_heterogeneity_col, reg_sf_heterogeneity[1,2])
b.put_number(treated_heterogeneity_se_row, sf_heterogeneity_col, reg_sf_heterogeneity[2,2])
b.put_number(n_row, sf_heterogeneity_col, n_sf_heterogeneity[1,1])
b.put_number(mean_control_row, sf_heterogeneity_col, strtoreal(st_local("mean_control_sf")))

reg_sf_not_school = st_matrix("reg_sf_not_school")
n_sf_not_school = st_matrix("n_not_school")
b.put_number(treated_row, sf_not_school_col, reg_sf_not_school[1,1])
b.put_number(treated_se_row, sf_not_school_col, reg_sf_not_school[2,1])
b.put_number(n_row, sf_not_school_col, n_sf_not_school[1,1])
b.put_number(mean_control_row, sf_not_school_col, strtoreal(st_local("mean_control_sf_not_school")))

reg_sf_not_school_heterogeneity = st_matrix("reg_sf_not_school_heterogeneity")
n_sf_not_school_heterogeneity = st_matrix("n_sf_not_school_heterogeneity")
b.put_number(treated_row, sf_not_school_heterogeneity_col, reg_sf_not_school_heterogeneity[1,1])
b.put_number(treated_se_row, sf_not_school_heterogeneity_col, reg_sf_not_school_heterogeneity[2,1])
b.put_number(treated_heterogeneity_row, sf_not_school_heterogeneity_col, reg_sf_not_school_heterogeneity[1,2])
b.put_number(treated_heterogeneity_se_row, sf_not_school_heterogeneity_col, reg_sf_not_school_heterogeneity[2,2])
b.put_number(n_row, sf_not_school_heterogeneity_col, n_sf_not_school_heterogeneity[1,1])
b.put_number(mean_control_row, sf_not_school_heterogeneity_col, strtoreal(st_local("mean_control_sf_not_school")))

reg_sf_nc_early = st_matrix("reg_sf_nc_early")
n_sf_nc_early = st_matrix("n_sf_nc_early")
b.put_number(treated_row, sf_nc_early_col, reg_sf_nc_early[1,1])
b.put_number(treated_se_row, sf_nc_early_col, reg_sf_nc_early[2,1])
b.put_number(n_row, sf_nc_early_col, n_sf_nc_early[1,1])
b.put_number(mean_control_row, sf_nc_early_col, strtoreal(st_local("mean_control_sf_nc_early")))


reg_sf_nc_early_heterogeneity = st_matrix("reg_sf_nc_early_heterogeneity")
n_sf_nc_early_heterogeneity = st_matrix("n_sf_nc_early_heterogeneity")
b.put_number(treated_row, sf_nc_early_heterogeneity_col, reg_sf_nc_early_heterogeneity[1,1])
b.put_number(treated_se_row, sf_nc_early_heterogeneity_col, reg_sf_nc_early_heterogeneity[2,1])
b.put_number(treated_heterogeneity_row, sf_nc_early_heterogeneity_col, reg_sf_nc_early_heterogeneity[1,2])
b.put_number(treated_heterogeneity_se_row, sf_nc_early_heterogeneity_col, reg_sf_nc_early_heterogeneity[2,2])
b.put_number(n_row, sf_nc_early_heterogeneity_col, n_sf_nc_early_heterogeneity[1,1])
b.put_number(mean_control_row, sf_nc_early_heterogeneity_col, strtoreal(st_local("mean_control_sf_nc_early")))


b.set_horizontal_align((min_col, max_row), (2,max_col), "center")
rows_to_star = (treated_row, treated_heterogeneity_row)
// This automatically adds the stars corresponding to the significance of the regression coeficients.
for (j=1; j<=cols(rows_to_star); j++){
	for (i=2; i<=max_col; i++) {
		t = abs(b.get_number(rows_to_star[j], i) / b.get_number(rows_to_star[j]+1, i))
		if(t >= 2.58) {
			stars = "\*\*\*"
		}
		else if (t >= 1.96) {
			stars = "\*\*"
		}
		else if (t >= 1.645) {
			stars = "\*"
		}
		else {
			stars = ""
		}

		b.set_number_format(rows_to_star[j], (i, i), "#,##0.000"+stars+";-#,##0.000"+stars)
	}
}

// These steps have to be done at the end because they will otherwise get overwritten
b.set_number_format(treated_se_row, (min_col, max_col), "(#,##0.000);(-#,##0.000)")
b.set_number_format(treated_heterogeneity_se_row, (min_col, max_col), "(#,##0.000);(-#,##0.000)")
b.set_number_format(n_row, (min_col, max_col), "number_sep")
b.set_number_format(mean_control_row, (min_col, max_col), "#,##0.000;-#,##0.000")
b.set_bottom_border(max_row,(1,max_col), "thick")
b.set_vertical_align((treated_heterogeneity_row, treated_heterogeneity_row), (1, max_col), "bottom")
b.set_vertical_align((treated_heterogeneity_se_row, treated_heterogeneity_se_row), (1, max_col), "top")
b.set_row_height(treated_heterogeneity_row,treated_heterogeneity_row,treated_heterogeneity_row_height)
b.close_book()
end

! /usr/local/bin/soffice --invisible --headless --convert-to pdf "`file'" --outdir "output/"
